import pandas as pd
import os
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.model_selection import train_test_split
from node2vec import Node2Vec
from node2vec.edges import HadamardEmbedder, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder
# sklearn
import sklearn
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score
import
= pd.read_csv("~/Desktop/fraudTrain.csv")
df = df[df["is_fraud"]==0].sample(frac=0.20, random_state=42).append(df[df["is_fraud"] == 1])
df df.head()
/tmp/ipykernel_3417592/1991497680.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
df = df[df["is_fraud"]==0].sample(frac=0.20, random_state=42).append(df[df["is_fraud"] == 1])
Unnamed: 0 | trans_date_trans_time | cc_num | merchant | category | amt | first | last | gender | street | ... | lat | long | city_pop | job | dob | trans_num | unix_time | merch_lat | merch_long | is_fraud | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
669418 | 669418 | 2019-10-12 18:21 | 4.089100e+18 | fraud_Haley, Jewess and Bechtelar | shopping_pos | 7.53 | Debra | Stark | F | 686 Linda Rest | ... | 32.3836 | -94.8653 | 24536 | Multimedia programmer | 1983-10-14 | d313353fa30233e5fab5468e852d22fc | 1350066071 | 32.202008 | -94.371865 | 0 |
32567 | 32567 | 2019-01-20 13:06 | 4.247920e+12 | fraud_Turner LLC | travel | 3.79 | Judith | Moss | F | 46297 Benjamin Plains Suite 703 | ... | 39.5370 | -83.4550 | 22305 | Television floor manager | 1939-03-09 | 88c65b4e1585934d578511e627fe3589 | 1327064760 | 39.156673 | -82.930503 | 0 |
156587 | 156587 | 2019-03-24 18:09 | 4.026220e+12 | fraud_Klein Group | entertainment | 59.07 | Debbie | Payne | F | 204 Ashley Neck Apt. 169 | ... | 41.5224 | -71.9934 | 4720 | Broadcast presenter | 1977-05-18 | 3bd9ede04b5c093143d5e5292940b670 | 1332612553 | 41.657152 | -72.595751 | 0 |
1020243 | 1020243 | 2020-02-25 15:12 | 4.957920e+12 | fraud_Monahan-Morar | personal_care | 25.58 | Alan | Parsons | M | 0547 Russell Ford Suite 574 | ... | 39.6171 | -102.4776 | 207 | Network engineer | 1955-12-04 | 19e16ee7a01d229e750359098365e321 | 1361805120 | 39.080346 | -103.213452 | 0 |
116272 | 116272 | 2019-03-06 23:19 | 4.178100e+15 | fraud_Kozey-Kuhlman | personal_care | 84.96 | Jill | Flores | F | 639 Cruz Islands | ... | 41.9488 | -86.4913 | 3104 | Horticulturist, commercial | 1981-03-29 | a0c8641ca1f5d6e243ed5a2246e66176 | 1331075954 | 42.502065 | -86.732664 | 0 |
5 rows × 23 columns
# def throw(df, fraud_rate): # 사기 거래 비율에 맞춰 버려지는 함수!
# df1 = df[df['is_fraud'] == 1].copy()
# df0 = df[df['is_fraud'] == 0].copy()
# df0_downsample = (len(df1) * (1-fraud_rate)) / (len(df0) * fraud_rate)
# df0_down = df0.sample(frac=df0_downsample, random_state=42)
# df_p = pd.concat([df1, df0_down])
# return df_p
# def split_dataframe(data_frame, test_fraud_rate, test_rate=0.3):
# n = len(data_frame)
# # 사기 거래와 정상 거래를 분리
# fraud_data = data_frame[data_frame['is_fraud'] == 1]
# normal_data = data_frame[data_frame['is_fraud'] == 0]
# # 테스트 데이터 크기 계산
# test_samples = int(test_fraud_rate * (n * test_rate))
# remaining_test_samples = int(n * test_rate) - test_samples
# # 사기 거래 및 정상 거래에서 무작위로 테스트 데이터 추출
# test_fraud_data = fraud_data.sample(n=test_samples, replace=False)
# test_normal_data = normal_data.sample(n=remaining_test_samples, replace=False)
# # 테스트 데이터 합치기
# test_data = pd.concat([test_normal_data, test_fraud_data])
# # 훈련 데이터 생성
# train_data = data_frame[~data_frame.index.isin(test_data.index)]
# return train_data, test_data
# def concat(df_tr, df_tst):
# df = pd.concat([df_tr, df_tst])
# train_mask = np.concatenate((np.full(len(df_tr), True), np.full(len(df_tst), False))) # index꼬이는거 방지하기 위해서? ★ (이거,, 훔,,?(
# test_mask = np.concatenate((np.full(len(df_tr), False), np.full(len(df_tst), True)))
# mask = (train_mask, test_mask)
# return df, mask
def evaluation(y, yhat):
= [sklearn.metrics.accuracy_score,
metrics
sklearn.metrics.precision_score,
sklearn.metrics.recall_score,
sklearn.metrics.f1_score,
sklearn.metrics.roc_auc_score]return pd.DataFrame({m.__name__:[m(y,yhat).round(6)] for m in metrics})
# def compute_time_difference(group):
# n = len(group)
# result = []
# for i in range(n):
# for j in range(n):
# time_difference = abs((group.iloc[i].trans_date_trans_time - group.iloc[j].trans_date_trans_time).total_seconds())
# result.append([group.iloc[i].name, group.iloc[j].name, time_difference])
# return result
# def edge_index_save(df, unique_col, theta, gamma):
# groups = df.groupby(unique_col)
# edge_index = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
# edge_index = edge_index.astype(np.float64)
# filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
# while os.path.exists(filename):
# self.save_attempt += 1
# filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
# np.save(filename, edge_index)
# #tetha = edge_index_plust_itme[:,].mean()
# edge_index[:,2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
# edge_index = torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
# return edge_index
# def edge_index(df, unique_col, theta, gamma):
# groups = df.groupby(unique_col)
# edge_index = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
# edge_index = edge_index.astype(np.float64)
# # filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
# # while os.path.exists(filename):
# # self.save_attempt += 1
# # filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
# # np.save(filename, edge_index)
# #tetha = edge_index_plust_itme[:,].mean()
# edge_index[:,2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
# edge_index = torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
# return edge_index
삼분그래프
def build_graph_tripartite(df_input, graph_type=nx.Graph()):
=df_input.copy()
df={x:node_id for node_id, x in enumerate(set(df.index.values.tolist() +
mapping"cc_num"].values.tolist() +
df["merchant"].values.tolist()))}
df["in_node"]= df["cc_num"].apply(lambda x: mapping[x])
df["out_node"]=df["merchant"].apply(lambda x:mapping[x])
df[
=nx.from_edgelist([(x["in_node"], mapping[idx]) for idx, x in df.iterrows()] +\
G"out_node"], mapping[idx]) for idx, x in df.iterrows()], create_using=graph_type)
[(x[
"in_node"], mapping[idx]):x["is_fraud"] for idx, x in df.iterrows()}, "label")
nx.set_edge_attributes(G,{(x[
"out_node"], mapping[idx]):x["is_fraud"] for idx, x in df.iterrows()}, "label")
nx.set_edge_attributes(G,{(x[
"in_node"], mapping[idx]):x["amt"] for idx, x in df.iterrows()}, "weight")
nx.set_edge_attributes(G,{(x[
"out_node"], mapping[idx]):x["amt"] for idx, x in df.iterrows()}, "weight")
nx.set_edge_attributes(G,{(x[
return G
지도학습
from sklearn.utils import resample
= df[df.is_fraud==0]
df_majority = df[df.is_fraud==1]
df_minority
= resample(df_majority,
df_maj_dowsampled =len(df_minority),
n_samples=42)
random_state
= pd.concat([df_minority, df_maj_dowsampled])
df_downsampled
print(df_downsampled.is_fraud.value_counts())
= build_graph_tripartite(df_downsampled) G_down
1 6006
0 6006
Name: is_fraud, dtype: int64
from sklearn.model_selection import train_test_split
= train_test_split(list(range(len(G_down.edges))),
train_edges, test_edges, train_labels, test_labels list(nx.get_edge_attributes(G_down, "label").values()),
=0.20,
test_size=42) random_state
= list(G_down.edges)
edgs = G_down.edge_subgraph([edgs[x] for x in train_edges]).copy()
train_graph list(set(G_down.nodes) - set(train_graph.nodes))) train_graph.add_nodes_from(
from node2vec import Node2Vec
from node2vec.edges import HadamardEmbedder, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder
= Node2Vec(train_graph, weight_key='weight')
node2vec_train = node2vec_train.fit(window=10) model_train
Generating walks (CPU: 1): 100%|██████████| 10/10 [00:25<00:00, 2.55s/it]
= [HadamardEmbedder]#, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder]
classes for cl in classes:
= cl(keyed_vectors=model_train.wv)
embeddings_train
= [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges]
test_embeddings
# DataFrame 생성
= [f'X_{i}' for i in range(np.array(train_embeddings).shape[1])]
columns = pd.DataFrame(data=train_embeddings, columns=columns)
df_data
= pd.DataFrame(data=train_labels, columns=['label'])
df_labels
# DataFrame 합치기
= pd.concat([df_data, df_labels], axis=1)
df
= np.array(train_labels) label
= TabularPredictor(label='label') predictr
No path specified. Models will be saved in: "AutogluonModels/ag-20240127_073147/"
predictr.fit(df)
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240127_073147/"
AutoGluon Version: 0.8.2
Python Version: 3.8.18
Operating System: Linux
Platform Machine: x86_64
Platform Version: #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov 2 18:01:13 UTC 2
Disk Space Avail: 600.47 GB / 982.82 GB (61.1%)
Train Data Rows: 19067
Train Data Columns: 128
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
2 unique label values: [0, 1]
If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping: class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
Available Memory: 18703.56 MB
Train Data (Original) Memory Usage: 9.76 MB (0.1% of available memory)
Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
Stage 1 Generators:
Fitting AsTypeFeatureGenerator...
Stage 2 Generators:
Fitting FillNaFeatureGenerator...
Stage 3 Generators:
Fitting IdentityFeatureGenerator...
Stage 4 Generators:
Fitting DropUniqueFeatureGenerator...
Stage 5 Generators:
Fitting DropDuplicatesFeatureGenerator...
Types of features in original data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
Types of features in processed data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
0.2s = Fit runtime
128 features in original data used to generate 128 features in processed data.
Train Data (Processed) Memory Usage: 9.76 MB (0.1% of available memory)
Data preprocessing and feature engineering runtime = 0.21s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 17160, Val Rows: 1907
User-specified model hyperparameters to be fit:
{
'NN_TORCH': {},
'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
'CAT': {},
'XGB': {},
'FASTAI': {},
'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074790>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.7677 = Validation score (accuracy)
0.05s = Training runtime
0.29s = Validation runtime
Fitting model: KNeighborsDist ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1822f20d0>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.7666 = Validation score (accuracy)
0.05s = Training runtime
0.29s = Validation runtime
Fitting model: LightGBMXT ...
0.7635 = Validation score (accuracy)
2.77s = Training runtime
0.02s = Validation runtime
Fitting model: LightGBM ...
0.7368 = Validation score (accuracy)
2.84s = Training runtime
0.01s = Validation runtime
Fitting model: RandomForestGini ...
0.7467 = Validation score (accuracy)
4.3s = Training runtime
0.04s = Validation runtime
Fitting model: RandomForestEntr ...
0.7488 = Validation score (accuracy)
6.1s = Training runtime
0.04s = Validation runtime
Fitting model: CatBoost ...
0.7556 = Validation score (accuracy)
9.29s = Training runtime
0.0s = Validation runtime
Fitting model: ExtraTreesGini ...
0.763 = Validation score (accuracy)
0.72s = Training runtime
0.04s = Validation runtime
Fitting model: ExtraTreesEntr ...
0.7609 = Validation score (accuracy)
0.75s = Training runtime
0.04s = Validation runtime
Fitting model: NeuralNetFastAI ...
0.7892 = Validation score (accuracy)
11.87s = Training runtime
0.02s = Validation runtime
Fitting model: XGBoost ...
0.7593 = Validation score (accuracy)
10.34s = Training runtime
0.03s = Validation runtime
Fitting model: NeuralNetTorch ...
0.7383 = Validation score (accuracy)
16.03s = Training runtime
0.09s = Validation runtime
Fitting model: LightGBMLarge ...
0.7646 = Validation score (accuracy)
11.8s = Training runtime
0.03s = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
0.8076 = Validation score (accuracy)
0.81s = Training runtime
0.0s = Validation runtime
AutoGluon training complete, total runtime = 79.69s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240127_073147/")
[1000] valid_set's binary_error: 0.24279
<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7fa1ed618dc0>
= np.array(test_embeddings) test
= [f'X_{i}' for i in range(test.shape[1])]
columns
# DataFrame 생성
= pd.DataFrame(data=test, columns=columns) test_df
= np.array(test_labels)
y
= predictr.predict(test_df) yhat
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5061820>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1b7d08e50>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
evaluation(y,yhat)
accuracy_score | precision_score | recall_score | f1_score | roc_auc_score | |
---|---|---|---|---|---|
0 | 0.498427 | 0.7 | 0.005836 | 0.011575 | 0.501651 |
= [HadamardEmbedder]#, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder]
classes for cl in classes:
= cl(keyed_vectors=model_train.wv)
embeddings_train
= [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges]
test_embeddings
# DataFrame 생성
= [f'X_{i}' for i in range(np.array(train_embeddings).shape[1])]
columns = pd.DataFrame(data=train_embeddings, columns=columns)
df_data
= pd.DataFrame(data=train_labels, columns=['label'])
df_labels
# DataFrame 합치기
= pd.concat([df_data, df_labels], axis=1)
df
= np.array(train_labels) label
= TabularPredictor(label='label') predictr
No path specified. Models will be saved in: "AutogluonModels/ag-20240127_073147/"
predictr.fit(df)
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240127_073147/"
AutoGluon Version: 0.8.2
Python Version: 3.8.18
Operating System: Linux
Platform Machine: x86_64
Platform Version: #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov 2 18:01:13 UTC 2
Disk Space Avail: 600.47 GB / 982.82 GB (61.1%)
Train Data Rows: 19067
Train Data Columns: 128
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
2 unique label values: [0, 1]
If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping: class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
Available Memory: 18703.56 MB
Train Data (Original) Memory Usage: 9.76 MB (0.1% of available memory)
Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
Stage 1 Generators:
Fitting AsTypeFeatureGenerator...
Stage 2 Generators:
Fitting FillNaFeatureGenerator...
Stage 3 Generators:
Fitting IdentityFeatureGenerator...
Stage 4 Generators:
Fitting DropUniqueFeatureGenerator...
Stage 5 Generators:
Fitting DropDuplicatesFeatureGenerator...
Types of features in original data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
Types of features in processed data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
0.2s = Fit runtime
128 features in original data used to generate 128 features in processed data.
Train Data (Processed) Memory Usage: 9.76 MB (0.1% of available memory)
Data preprocessing and feature engineering runtime = 0.21s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 17160, Val Rows: 1907
User-specified model hyperparameters to be fit:
{
'NN_TORCH': {},
'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
'CAT': {},
'XGB': {},
'FASTAI': {},
'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074790>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.7677 = Validation score (accuracy)
0.05s = Training runtime
0.29s = Validation runtime
Fitting model: KNeighborsDist ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1822f20d0>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.7666 = Validation score (accuracy)
0.05s = Training runtime
0.29s = Validation runtime
Fitting model: LightGBMXT ...
0.7635 = Validation score (accuracy)
2.77s = Training runtime
0.02s = Validation runtime
Fitting model: LightGBM ...
0.7368 = Validation score (accuracy)
2.84s = Training runtime
0.01s = Validation runtime
Fitting model: RandomForestGini ...
0.7467 = Validation score (accuracy)
4.3s = Training runtime
0.04s = Validation runtime
Fitting model: RandomForestEntr ...
0.7488 = Validation score (accuracy)
6.1s = Training runtime
0.04s = Validation runtime
Fitting model: CatBoost ...
0.7556 = Validation score (accuracy)
9.29s = Training runtime
0.0s = Validation runtime
Fitting model: ExtraTreesGini ...
0.763 = Validation score (accuracy)
0.72s = Training runtime
0.04s = Validation runtime
Fitting model: ExtraTreesEntr ...
0.7609 = Validation score (accuracy)
0.75s = Training runtime
0.04s = Validation runtime
Fitting model: NeuralNetFastAI ...
0.7892 = Validation score (accuracy)
11.87s = Training runtime
0.02s = Validation runtime
Fitting model: XGBoost ...
0.7593 = Validation score (accuracy)
10.34s = Training runtime
0.03s = Validation runtime
Fitting model: NeuralNetTorch ...
0.7383 = Validation score (accuracy)
16.03s = Training runtime
0.09s = Validation runtime
Fitting model: LightGBMLarge ...
0.7646 = Validation score (accuracy)
11.8s = Training runtime
0.03s = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
0.8076 = Validation score (accuracy)
0.81s = Training runtime
0.0s = Validation runtime
AutoGluon training complete, total runtime = 79.69s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240127_073147/")
[1000] valid_set's binary_error: 0.24279
<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7fa1ed618dc0>
= np.array(test_embeddings) test
= [f'X_{i}' for i in range(test.shape[1])]
columns
# DataFrame 생성
= pd.DataFrame(data=test, columns=columns) test_df
= np.array(test_labels)
y
= predictr.predict(test_df) yhat
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5061820>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1b7d08e50>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
evaluation(y,yhat)
accuracy_score | precision_score | recall_score | f1_score | roc_auc_score | |
---|---|---|---|---|---|
0 | 0.498427 | 0.7 | 0.005836 | 0.011575 | 0.501651 |
= [AverageEmbedder] # , HadamardEmbedder, WeightedL1Embedder, WeightedL2Embedder]
classes for cl in classes:
= cl(keyed_vectors=model_train.wv)
embeddings_train
= [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges]
test_embeddings
# DataFrame 생성
= [f'X_{i}' for i in range(np.array(train_embeddings).shape[1])]
columns = pd.DataFrame(data=train_embeddings, columns=columns)
df_data
= pd.DataFrame(data=train_labels, columns=['label'])
df_labels
# DataFrame 합치기
= pd.concat([df_data, df_labels], axis=1)
df
= np.array(train_labels)
label
= TabularPredictor(label='label')
predictr
predictr.fit(df)
= np.array(test_embeddings)
test
= [f'X_{i}' for i in range(test.shape[1])]
columns
# DataFrame 생성
= pd.DataFrame(data=test, columns=columns)
test_df
= np.array(test_labels)
y
= predictr.predict(test_df)
yhat
evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240127_073500/"
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240127_073500/"
AutoGluon Version: 0.8.2
Python Version: 3.8.18
Operating System: Linux
Platform Machine: x86_64
Platform Version: #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov 2 18:01:13 UTC 2
Disk Space Avail: 599.89 GB / 982.82 GB (61.0%)
Train Data Rows: 19067
Train Data Columns: 128
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
2 unique label values: [0, 1]
If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping: class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
Available Memory: 18646.15 MB
Train Data (Original) Memory Usage: 9.76 MB (0.1% of available memory)
Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
Stage 1 Generators:
Fitting AsTypeFeatureGenerator...
Stage 2 Generators:
Fitting FillNaFeatureGenerator...
Stage 3 Generators:
Fitting IdentityFeatureGenerator...
Stage 4 Generators:
Fitting DropUniqueFeatureGenerator...
Stage 5 Generators:
Fitting DropDuplicatesFeatureGenerator...
Types of features in original data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
Types of features in processed data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
0.1s = Fit runtime
128 features in original data used to generate 128 features in processed data.
Train Data (Processed) Memory Usage: 9.76 MB (0.1% of available memory)
Data preprocessing and feature engineering runtime = 0.16s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 17160, Val Rows: 1907
User-specified model hyperparameters to be fit:
{
'NN_TORCH': {},
'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
'CAT': {},
'XGB': {},
'FASTAI': {},
'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5108af0>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.7252 = Validation score (accuracy)
0.05s = Training runtime
0.29s = Validation runtime
Fitting model: KNeighborsDist ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5108af0>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.7257 = Validation score (accuracy)
0.05s = Training runtime
0.31s = Validation runtime
Fitting model: LightGBMXT ...
0.7997 = Validation score (accuracy)
13.14s = Training runtime
0.07s = Validation runtime
Fitting model: LightGBM ...
0.7829 = Validation score (accuracy)
4.43s = Training runtime
0.02s = Validation runtime
Fitting model: RandomForestGini ...
0.7425 = Validation score (accuracy)
3.34s = Training runtime
0.04s = Validation runtime
Fitting model: RandomForestEntr ...
0.7488 = Validation score (accuracy)
4.8s = Training runtime
0.04s = Validation runtime
Fitting model: CatBoost ...
0.7944 = Validation score (accuracy)
65.85s = Training runtime
0.01s = Validation runtime
Fitting model: ExtraTreesGini ...
0.7493 = Validation score (accuracy)
0.7s = Training runtime
0.04s = Validation runtime
Fitting model: ExtraTreesEntr ...
0.7472 = Validation score (accuracy)
0.72s = Training runtime
0.04s = Validation runtime
Fitting model: NeuralNetFastAI ...
0.8464 = Validation score (accuracy)
9.77s = Training runtime
0.02s = Validation runtime
Fitting model: XGBoost ...
0.7908 = Validation score (accuracy)
7.2s = Training runtime
0.02s = Validation runtime
Fitting model: NeuralNetTorch ...
0.8311 = Validation score (accuracy)
19.99s = Training runtime
0.01s = Validation runtime
Fitting model: LightGBMLarge ...
0.7834 = Validation score (accuracy)
8.34s = Training runtime
0.02s = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
0.8542 = Validation score (accuracy)
0.72s = Training runtime
0.0s = Validation runtime
AutoGluon training complete, total runtime = 141.06s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240127_073500/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074820>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074790>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
[1000] valid_set's binary_error: 0.227583
[2000] valid_set's binary_error: 0.214997
[3000] valid_set's binary_error: 0.209229
[4000] valid_set's binary_error: 0.203461
[5000] valid_set's binary_error: 0.201888
[6000] valid_set's binary_error: 0.203985
[1000] valid_set's binary_error: 0.226534
accuracy_score | precision_score | recall_score | f1_score | roc_auc_score | |
---|---|---|---|---|---|
0 | 0.591777 | 0.82311 | 0.240517 | 0.372258 | 0.594076 |
= [WeightedL1Embedder]
classes for cl in classes:
= cl(keyed_vectors=model_train.wv)
embeddings_train
= [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges]
test_embeddings
# DataFrame 생성
= [f'X_{i}' for i in range(np.array(train_embeddings).shape[1])]
columns = pd.DataFrame(data=train_embeddings, columns=columns)
df_data
= pd.DataFrame(data=train_labels, columns=['label'])
df_labels
# DataFrame 합치기
= pd.concat([df_data, df_labels], axis=1)
df
= np.array(train_labels)
label
= TabularPredictor(label='label')
predictr
predictr.fit(df)
= np.array(test_embeddings)
test
= [f'X_{i}' for i in range(test.shape[1])]
columns
# DataFrame 생성
= pd.DataFrame(data=test, columns=columns)
test_df
= np.array(test_labels)
y
= predictr.predict(test_df)
yhat
evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240127_073726/"
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240127_073726/"
AutoGluon Version: 0.8.2
Python Version: 3.8.18
Operating System: Linux
Platform Machine: x86_64
Platform Version: #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov 2 18:01:13 UTC 2
Disk Space Avail: 599.32 GB / 982.82 GB (61.0%)
Train Data Rows: 19067
Train Data Columns: 128
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
2 unique label values: [0, 1]
If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping: class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
Available Memory: 18658.69 MB
Train Data (Original) Memory Usage: 9.76 MB (0.1% of available memory)
Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
Stage 1 Generators:
Fitting AsTypeFeatureGenerator...
Stage 2 Generators:
Fitting FillNaFeatureGenerator...
Stage 3 Generators:
Fitting IdentityFeatureGenerator...
Stage 4 Generators:
Fitting DropUniqueFeatureGenerator...
Stage 5 Generators:
Fitting DropDuplicatesFeatureGenerator...
Types of features in original data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
Types of features in processed data (raw dtype, special dtypes):
('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
0.3s = Fit runtime
128 features in original data used to generate 128 features in processed data.
Train Data (Processed) Memory Usage: 9.76 MB (0.1% of available memory)
Data preprocessing and feature engineering runtime = 0.28s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 17160, Val Rows: 1907
User-specified model hyperparameters to be fit:
{
'NN_TORCH': {},
'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
'CAT': {},
'XGB': {},
'FASTAI': {},
'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1b7d08dc0>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.5501 = Validation score (accuracy)
0.05s = Training runtime
0.29s = Validation runtime
Fitting model: KNeighborsDist ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1b7d08dc0>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
0.5506 = Validation score (accuracy)
0.05s = Training runtime
0.3s = Validation runtime
Fitting model: LightGBMXT ...
0.646 = Validation score (accuracy)
1.16s = Training runtime
0.01s = Validation runtime
Fitting model: LightGBM ...
0.6403 = Validation score (accuracy)
1.03s = Training runtime
0.01s = Validation runtime
Fitting model: RandomForestGini ...
0.635 = Validation score (accuracy)
3.6s = Training runtime
0.04s = Validation runtime
Fitting model: RandomForestEntr ...
0.6392 = Validation score (accuracy)
5.08s = Training runtime
0.04s = Validation runtime
Fitting model: CatBoost ...
0.656 = Validation score (accuracy)
4.77s = Training runtime
0.0s = Validation runtime
Fitting model: ExtraTreesGini ...
0.6329 = Validation score (accuracy)
0.69s = Training runtime
0.04s = Validation runtime
Fitting model: ExtraTreesEntr ...
0.6382 = Validation score (accuracy)
0.73s = Training runtime
0.04s = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 3: early stopping
0.6361 = Validation score (accuracy)
9.47s = Training runtime
0.02s = Validation runtime
Fitting model: XGBoost ...
0.6319 = Validation score (accuracy)
2.19s = Training runtime
0.01s = Validation runtime
Fitting model: NeuralNetTorch ...
0.6518 = Validation score (accuracy)
5.81s = Training runtime
0.08s = Validation runtime
Fitting model: LightGBMLarge ...
0.6408 = Validation score (accuracy)
3.64s = Training runtime
0.01s = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
0.6686 = Validation score (accuracy)
0.73s = Training runtime
0.0s = Validation runtime
AutoGluon training complete, total runtime = 40.83s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240127_073726/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1822f2790>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074790>
Traceback (most recent call last):
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
self._make_module_from_path(filepath)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
module = module_class(filepath, prefix, user_api, internal_api)
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
self.version = self.get_version()
File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
accuracy_score | precision_score | recall_score | f1_score | roc_auc_score | |
---|---|---|---|---|---|
0 | 0.496119 | 0.0 | 0.0 | 0.0 | 0.499367 |